Call Libraries
library(tidyverse)
library(caret)
library(MASS)
library(car)
library(moments)
Calling the Transformed Datasets
income_cleaned = read_csv('NYS_Corp_Tax_Credit_data/income_cleaned.csv')
Rows: 1921 Columns: 6── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Name, Group
dbl (4): Year, Num, Amount, Avg
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
income_cleaned = income_cleaned %>% select(-Amount)
industry_cleaned = read_csv('NYS_Corp_Tax_Credit_data/industry_cleaned.csv')
Rows: 2476 Columns: 6── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Name, Group
dbl (4): Year, Num, Amount, Avg
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
industry_cleaned = industry_cleaned %>% select(-Amount)
Creating the Models
sat.model.summary <- function (df, field, sat.formula){
#Shapiro-Wilks test to evaluate normality
print(shapiro.test(df[[field]]))
#Kurtosis evaluation (normal distribution has a value close to 3)
print('kurtosis')
print(kurtosis(df[[field]]))
linear.model.cleaned = lm(sat.formula, data = df)
print(summary(linear.model.cleaned))
plot(linear.model.cleaned)
#histograms of response variable to check distribution
print(df %>%
ggplot(aes_string(field)) +
geom_histogram() +
labs(title = 'Average Credit Amount Distribution') +
theme(plot.title = element_text(hjust = 0.5)))
#Checking multicollinearity using VIF measurement
print(vif(linear.model.cleaned))
influencePlot(linear.model.cleaned)
#avPlots(linear.model.cleaned)
}
sat.formula <- Avg ~ .
sat.field <- 'Avg'
sat.model.summary(income_cleaned, sat.field, sat.formula)
Shapiro-Wilk normality test
data: df[[field]]
W = 0.17297, p-value < 2.2e-16
[1] "kurtosis"
[1] 169.7518
Call:
lm(formula = sat.formula, data = df)
Residuals:
Min 1Q Median 3Q Max
-11338570 -547702 94139 421302 87181761
Coefficients:
Estimate Std. Error
(Intercept) -1.157e+08 5.512e+07
Year 5.721e+04 2.731e+04
NameAlternative Fuels and Electric Vehicle Recharging Property Credit -3.287e+05 1.242e+06
NameAlternative Minimum Tax Credit 6.538e+05 9.757e+05
NameBeer Production Credit 3.316e+05 1.290e+06
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15 1.429e+06 9.960e+05
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 4.098e+06 1.421e+06
NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08 1.182e+06 9.927e+05
NameBrownfield Tax Credits - Remediation Real Property Tax Credit -7.587e+04 9.920e+05
NameClean Heating Fuel Credit 7.148e+04 1.024e+06
NameConservation Easement Tax Credit 1.143e+05 1.064e+06
NameCredit for Employment of Persons with Disabilities -9.343e+05 1.119e+06
NameCredit for Purchase of an Automated External Defibrillator -1.449e+05 9.695e+05
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities 2.316e+05 1.779e+06
NameEmpire State Apprentice Tax Credit -7.983e+05 1.524e+06
NameEmpire State Commercial Production Credit 2.183e+05 1.291e+06
NameEmpire State Film Post Production Credit 2.521e+04 1.049e+06
NameEmpire State Film Production Credit 1.145e+07 9.967e+05
NameEmpire State Musical and Theatrical Production Credit -2.342e+05 1.775e+06
NameExcelsior Jobs Program Credit 1.035e+05 9.545e+05
NameEZ/QEZE Tax Credits - EZ Investment Tax Credit 2.752e+06 8.862e+05
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit 4.845e+05 9.213e+05
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes 1.245e+06 9.239e+05
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners -1.025e+05 9.549e+05
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit -6.145e+04 9.387e+05
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners 9.119e+04 1.044e+06
NameFarm Workforce Retention Credit -7.802e+03 1.285e+06
NameFarmers' School Tax Credit 1.440e+05 1.021e+06
NameHire a Veteran Credit -1.238e+06 1.782e+06
NameHistoric Properties Rehabilitation Credit 1.352e+06 1.055e+06
NameIndustrial or Manufacturing Business Tax Credit 4.997e+05 1.074e+06
NameInvestment Tax Credit 4.463e+05 8.895e+05
NameInvestment Tax Credit for the Financial Services Industry 3.976e+05 1.008e+06
NameLife Sciences Research & Development Tax Credit -8.207e+04 2.099e+06
NameLong-Term Care Insurance Credit 1.246e+05 9.808e+05
NameLow-Income Housing Credit -2.764e+04 1.113e+06
NameMinimum Wage Reimbursement Credit -2.931e+05 1.006e+06
NameMortgage Servicing Tax Credit -4.540e+05 1.085e+06
NameNew York Youth Jobs Program Tax Credit -2.587e+05 9.381e+05
NameQETC Capital Tax Credit 2.616e+05 1.386e+06
NameQETC Employment Credit 1.335e+05 1.026e+06
NameQETC Facilities, Operations, and Training Credit 5.153e+05 1.918e+06
NameReal Property Tax Relief Credit for Manufacturing -2.635e+05 9.654e+05
NameSpecial Additional Mortgage Recording Tax Credit 3.678e+04 9.244e+05
NameSTART-UP NY Tax Elimination Credit 4.061e+03 1.130e+06
Group1,000,000 - 24,999,999 2.170e+05 3.501e+05
Group100,000 - 499,999 1.352e+05 3.579e+05
Group100,000,000 - 499,999,999 9.644e+05 3.937e+05
Group25,000,000 - 49,999,999 4.519e+05 4.223e+05
Group50,000,000 - 99,999,999 4.021e+05 4.235e+05
Group500,000 - 999,999 2.195e+05 3.880e+05
Group500,000,000 - and over 3.073e+06 3.846e+05
GroupZero or Net Loss 9.536e+05 3.339e+05
Num -4.520e+02 8.569e+02
t value Pr(>|t|)
(Intercept) -2.100 0.03590 *
Year 2.095 0.03634 *
NameAlternative Fuels and Electric Vehicle Recharging Property Credit -0.265 0.79140
NameAlternative Minimum Tax Credit 0.670 0.50286
NameBeer Production Credit 0.257 0.79715
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15 1.434 0.15162
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 2.883 0.00398 **
NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08 1.191 0.23386
NameBrownfield Tax Credits - Remediation Real Property Tax Credit -0.076 0.93904
NameClean Heating Fuel Credit 0.070 0.94436
NameConservation Easement Tax Credit 0.107 0.91444
NameCredit for Employment of Persons with Disabilities -0.835 0.40391
NameCredit for Purchase of an Automated External Defibrillator -0.150 0.88117
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities 0.130 0.89645
NameEmpire State Apprentice Tax Credit -0.524 0.60040
NameEmpire State Commercial Production Credit 0.169 0.86576
NameEmpire State Film Post Production Credit 0.024 0.98082
NameEmpire State Film Production Credit 11.485 < 2e-16 ***
NameEmpire State Musical and Theatrical Production Credit -0.132 0.89507
NameExcelsior Jobs Program Credit 0.108 0.91366
NameEZ/QEZE Tax Credits - EZ Investment Tax Credit 3.105 0.00193 **
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit 0.526 0.59899
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes 1.348 0.17785
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners -0.107 0.91455
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit -0.065 0.94781
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners 0.087 0.93038
NameFarm Workforce Retention Credit -0.006 0.99516
NameFarmers' School Tax Credit 0.141 0.88786
NameHire a Veteran Credit -0.695 0.48721
NameHistoric Properties Rehabilitation Credit 1.282 0.20015
NameIndustrial or Manufacturing Business Tax Credit 0.465 0.64173
NameInvestment Tax Credit 0.502 0.61588
NameInvestment Tax Credit for the Financial Services Industry 0.395 0.69325
NameLife Sciences Research & Development Tax Credit -0.039 0.96882
NameLong-Term Care Insurance Credit 0.127 0.89892
NameLow-Income Housing Credit -0.025 0.98020
NameMinimum Wage Reimbursement Credit -0.291 0.77075
NameMortgage Servicing Tax Credit -0.418 0.67565
NameNew York Youth Jobs Program Tax Credit -0.276 0.78279
NameQETC Capital Tax Credit 0.189 0.85032
NameQETC Employment Credit 0.130 0.89641
NameQETC Facilities, Operations, and Training Credit 0.269 0.78820
NameReal Property Tax Relief Credit for Manufacturing -0.273 0.78490
NameSpecial Additional Mortgage Recording Tax Credit 0.040 0.96827
NameSTART-UP NY Tax Elimination Credit 0.004 0.99713
Group1,000,000 - 24,999,999 0.620 0.53548
Group100,000 - 499,999 0.378 0.70572
Group100,000,000 - 499,999,999 2.449 0.01441 *
Group25,000,000 - 49,999,999 1.070 0.28474
Group50,000,000 - 99,999,999 0.950 0.34247
Group500,000 - 999,999 0.566 0.57170
Group500,000,000 - and over 7.991 2.33e-15 ***
GroupZero or Net Loss 2.856 0.00433 **
Num -0.527 0.59791
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3868000 on 1867 degrees of freedom
Multiple R-squared: 0.2361, Adjusted R-squared: 0.2144
F-statistic: 10.88 on 53 and 1867 DF, p-value: < 2.2e-16
GVIF Df GVIF^(1/(2*Df))
Year 2.125869 1 1.458036
Name 3.521453 43 1.014746
Group 1.510763 8 1.026124
Num 1.645484 1 1.282764
income.model <- lm(sat.formula, data = income_cleaned)
sat.model.summary(industry_cleaned, sat.field, sat.formula)
Shapiro-Wilk normality test
data: df[[field]]
W = 0.22287, p-value < 2.2e-16
[1] "kurtosis"
[1] 135.5482
Call:
lm(formula = sat.formula, data = df)
Residuals:
Min 1Q Median 3Q Max
-11581741 -371031 -23164 154182 28917959
Coefficients:
Estimate Std. Error
(Intercept) -3.926e+07 2.051e+07
Year 1.936e+04 1.016e+04
NameAlternative Fuels and Electric Vehicle Recharging Property Credit 5.584e+04 5.062e+05
NameAlternative Minimum Tax Credit 3.523e+05 4.208e+05
NameBeer Production Credit 8.799e+04 6.261e+05
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15 1.919e+06 4.473e+05
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 4.064e+06 7.278e+05
NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08 1.054e+06 4.779e+05
NameBrownfield Tax Credits - Remediation Real Property Tax Credit 1.327e+05 4.760e+05
NameClean Heating Fuel Credit 2.785e+05 4.595e+05
NameConservation Easement Tax Credit 2.743e+04 4.700e+05
NameCredit for Employment of Persons with Disabilities 1.574e+05 5.069e+05
NameCredit for Purchase of an Automated External Defibrillator 1.082e+05 4.465e+05
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities 2.090e+05 8.286e+05
NameEmpire State Apprentice Tax Credit -3.324e+05 1.012e+06
NameEmpire State Commercial Production Credit 3.524e+05 6.172e+05
NameEmpire State Film Post Production Credit 5.332e+05 5.224e+05
NameEmpire State Film Production Credit 1.170e+07 4.785e+05
NameEmpire State Musical and Theatrical Production Credit 7.676e+04 9.010e+05
NameExcelsior Jobs Program Credit 7.086e+05 4.393e+05
NameEZ/QEZE Tax Credits - EZ Investment Tax Credit 1.381e+06 4.318e+05
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit 3.715e+05 4.213e+05
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes 1.160e+06 4.192e+05
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners 3.634e+05 4.311e+05
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit 2.567e+05 4.265e+05
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners 7.379e+04 5.041e+05
NameFarm Workforce Retention Credit 2.727e+04 5.352e+05
NameFarmers' School Tax Credit 1.287e+05 5.133e+05
NameHire a Veteran Credit 1.459e+05 8.253e+05
NameHistoric Properties Rehabilitation Credit 1.875e+06 4.841e+05
NameInvestment Tax Credit 7.182e+05 4.129e+05
NameInvestment Tax Credit for the Financial Services Industry 6.339e+05 5.757e+05
NameLife Sciences Research & Development Tax Credit -2.243e+03 9.007e+05
NameLong-Term Care Insurance Credit 1.385e+05 4.200e+05
NameLow-Income Housing Credit 1.642e+06 5.494e+05
NameMinimum Wage Reimbursement Credit 9.624e+04 4.381e+05
NameMortgage Servicing Tax Credit 2.077e+05 6.462e+05
NameNew York Youth Jobs Program Tax Credit 1.953e+05 4.273e+05
NameQETC Capital Tax Credit 2.986e+05 5.868e+05
NameQETC Employment Credit 8.982e+04 4.465e+05
NameQETC Facilities, Operations, and Training Credit 3.477e+05 6.711e+05
NameReal Property Tax Relief Credit for Manufacturing 1.520e+05 4.423e+05
NameSpecial Additional Mortgage Recording Tax Credit 2.609e+05 4.432e+05
NameSTART-UP NY Tax Elimination Credit 1.703e+04 4.750e+05
GroupAdministrative and Support and Waste Management and Remediation Services -2.280e+03 2.519e+05
GroupAdministrative/Support/Waste Management/Remediation Services -3.173e+03 2.798e+05
GroupAgriculture, Forestry, Fishing and Hunting 7.269e+04 2.330e+05
GroupArts, Entertainment, and Recreation 5.866e+05 2.317e+05
GroupConstruction -1.699e+04 2.171e+05
GroupEducational Services 5.569e+04 2.948e+05
GroupFinance and Insurance 2.139e+05 2.034e+05
GroupHealth Care and Social Assistance 1.300e+04 2.290e+05
GroupInformation -2.433e+05 2.178e+05
GroupManagement of Companies and Enterprises 3.355e+05 1.949e+05
GroupManufacturing 6.786e+05 2.028e+05
GroupMining -1.561e+04 3.593e+05
GroupMining, Quarrying, and Oil and Gas Extraction 1.084e+05 3.011e+05
GroupOther Services (except Public Administration) -6.462e+04 2.217e+05
GroupProfessional, Scientific, and Technical Services 3.736e+05 2.085e+05
GroupReal Estate and Rental and Leasing 9.506e+04 2.044e+05
GroupRetail Trade 4.746e+04 2.036e+05
GroupTransportation and Warehousing -1.866e+04 2.300e+05
GroupUtilities 5.308e+05 2.633e+05
GroupWholesale Trade 6.106e+04 2.081e+05
Num -9.657e+02 4.272e+02
t value Pr(>|t|)
(Intercept) -1.914 0.055677 .
Year 1.905 0.056959 .
NameAlternative Fuels and Electric Vehicle Recharging Property Credit 0.110 0.912180
NameAlternative Minimum Tax Credit 0.837 0.402556
NameBeer Production Credit 0.141 0.888249
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15 4.290 1.86e-05 ***
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 5.584 2.62e-08 ***
NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08 2.206 0.027462 *
NameBrownfield Tax Credits - Remediation Real Property Tax Credit 0.279 0.780429
NameClean Heating Fuel Credit 0.606 0.544549
NameConservation Easement Tax Credit 0.058 0.953457
NameCredit for Employment of Persons with Disabilities 0.311 0.756142
NameCredit for Purchase of an Automated External Defibrillator 0.242 0.808639
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities 0.252 0.800866
NameEmpire State Apprentice Tax Credit -0.329 0.742537
NameEmpire State Commercial Production Credit 0.571 0.568123
NameEmpire State Film Post Production Credit 1.021 0.307585
NameEmpire State Film Production Credit 24.458 < 2e-16 ***
NameEmpire State Musical and Theatrical Production Credit 0.085 0.932112
NameExcelsior Jobs Program Credit 1.613 0.106849
NameEZ/QEZE Tax Credits - EZ Investment Tax Credit 3.199 0.001399 **
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit 0.882 0.377942
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes 2.767 0.005700 **
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners 0.843 0.399359
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit 0.602 0.547350
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners 0.146 0.883634
NameFarm Workforce Retention Credit 0.051 0.959361
NameFarmers' School Tax Credit 0.251 0.802102
NameHire a Veteran Credit 0.177 0.859709
NameHistoric Properties Rehabilitation Credit 3.874 0.000110 ***
NameInvestment Tax Credit 1.739 0.082075 .
NameInvestment Tax Credit for the Financial Services Industry 1.101 0.270913
NameLife Sciences Research & Development Tax Credit -0.002 0.998014
NameLong-Term Care Insurance Credit 0.330 0.741537
NameLow-Income Housing Credit 2.988 0.002833 **
NameMinimum Wage Reimbursement Credit 0.220 0.826159
NameMortgage Servicing Tax Credit 0.321 0.747934
NameNew York Youth Jobs Program Tax Credit 0.457 0.647604
NameQETC Capital Tax Credit 0.509 0.610823
NameQETC Employment Credit 0.201 0.840607
NameQETC Facilities, Operations, and Training Credit 0.518 0.604441
NameReal Property Tax Relief Credit for Manufacturing 0.344 0.731171
NameSpecial Additional Mortgage Recording Tax Credit 0.589 0.556155
NameSTART-UP NY Tax Elimination Credit 0.036 0.971411
GroupAdministrative and Support and Waste Management and Remediation Services -0.009 0.992777
GroupAdministrative/Support/Waste Management/Remediation Services -0.011 0.990952
GroupAgriculture, Forestry, Fishing and Hunting 0.312 0.755097
GroupArts, Entertainment, and Recreation 2.532 0.011408 *
GroupConstruction -0.078 0.937621
GroupEducational Services 0.189 0.850176
GroupFinance and Insurance 1.051 0.293237
GroupHealth Care and Social Assistance 0.057 0.954731
GroupInformation -1.117 0.263917
GroupManagement of Companies and Enterprises 1.721 0.085332 .
GroupManufacturing 3.346 0.000831 ***
GroupMining -0.043 0.965352
GroupMining, Quarrying, and Oil and Gas Extraction 0.360 0.718901
GroupOther Services (except Public Administration) -0.291 0.770697
GroupProfessional, Scientific, and Technical Services 1.792 0.073214 .
GroupReal Estate and Rental and Leasing 0.465 0.641882
GroupRetail Trade 0.233 0.815756
GroupTransportation and Warehousing -0.081 0.935321
GroupUtilities 2.016 0.043917 *
GroupWholesale Trade 0.293 0.769261
Num -2.260 0.023889 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1612000 on 2411 degrees of freedom
Multiple R-squared: 0.4674, Adjusted R-squared: 0.4533
F-statistic: 33.06 on 64 and 2411 DF, p-value: < 2.2e-16
GVIF Df GVIF^(1/(2*Df))
Year 2.190806 1 1.480137
Name 5.185764 42 1.019787
Group 2.724217 20 1.025371
Num 1.370920 1 1.170863
industry.model <- lm(sat.formula, data = industry_cleaned)
Correcting violation of Normality in previous model with BoxCox transform
bc_func <- function (lm.cleaned, lambda.range){
bc = boxCox(lm.cleaned, lambda = lambda.range)
#Extracting the best lambda value.
return(bc$x[which(bc$y == max(bc$y))])
}
#Income Group Dataset
income.lambda.bc = bc_func(income.model, seq(-0.2, 0.2, 1/10))
income.lambda.bc
#Industry Group Dataset
industry.lambda.bc = bc_func(industry.model, seq(-0.2, 0.2, 1/10))
industry.lambda.bc
bc_transform <- function(df, lambda.bc){
return (df %>%
mutate(Avg.bc = (Avg^lambda.bc -1)/lambda.bc) %>%
select(-c(Avg))) #took out field Amount
}
#Income Group Dataset
income_cleaned_bc <- bc_transform(income_cleaned, income.lambda.bc)
income.model.bc = lm(Avg.bc ~ ., data = income_cleaned_bc)
#Industry Group Dataset
industry_cleaned_bc <- bc_transform(industry_cleaned, industry.lambda.bc)
industry.model.bc = lm(Avg.bc ~ ., data = industry_cleaned_bc)
Checking linear regression assumptions for the transformed data.
sat.formula.bc <- Avg.bc ~ .
sat.field.bc <- 'Avg.bc'
#Income
sat.model.summary(income_cleaned_bc, sat.field.bc, sat.formula.bc)
Shapiro-Wilk normality test
data: df[[field]]
W = 0.99696, p-value = 0.000782
[1] "kurtosis"
[1] -0.2553313
Call:
lm(formula = sat.formula, data = df)
Residuals:
Min 1Q Median 3Q Max
-6.6597 -0.5738 -0.0113 0.5668 4.4826
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.634e+01 1.456e+01 -2.496 0.012643
Year 2.288e-02 7.214e-03 3.171 0.001543
NameAlternative Fuels and Electric Vehicle Recharging Property Credit -8.643e-01 3.281e-01 -2.634 0.008510
NameAlternative Minimum Tax Credit -2.052e+00 2.577e-01 -7.962 2.91e-15
NameBeer Production Credit 5.687e-01 3.407e-01 1.669 0.095234
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15 1.715e+00 2.630e-01 6.521 8.99e-11
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 2.676e+00 3.754e-01 7.130 1.43e-12
NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08 1.331e+00 2.622e-01 5.076 4.24e-07
NameBrownfield Tax Credits - Remediation Real Property Tax Credit 5.458e-02 2.620e-01 0.208 0.834991
NameClean Heating Fuel Credit -3.086e+00 2.705e-01 -11.409 < 2e-16
NameConservation Easement Tax Credit -2.137e+00 2.810e-01 -7.606 4.45e-14
NameCredit for Employment of Persons with Disabilities -3.118e+00 2.956e-01 -10.547 < 2e-16
NameCredit for Purchase of an Automated External Defibrillator -2.938e+00 2.560e-01 -11.473 < 2e-16
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities -5.673e-01 4.698e-01 -1.207 0.227430
NameEmpire State Apprentice Tax Credit -2.207e+00 4.024e-01 -5.485 4.71e-08
NameEmpire State Commercial Production Credit 6.075e-02 3.410e-01 0.178 0.858634
NameEmpire State Film Post Production Credit 1.101e+00 2.769e-01 3.977 7.25e-05
NameEmpire State Film Production Credit 2.853e+00 2.632e-01 10.838 < 2e-16
NameEmpire State Musical and Theatrical Production Credit 2.508e-01 4.688e-01 0.535 0.592803
NameExcelsior Jobs Program Credit 4.651e-01 2.521e-01 1.845 0.065178
NameEZ/QEZE Tax Credits - EZ Investment Tax Credit 8.876e-01 2.341e-01 3.792 0.000154
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit 1.775e-01 2.433e-01 0.730 0.465701
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes 1.220e+00 2.440e-01 5.000 6.26e-07
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners 1.182e-01 2.522e-01 0.469 0.639260
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit -8.497e-01 2.479e-01 -3.427 0.000623
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners -1.358e+00 2.756e-01 -4.928 9.02e-07
NameFarm Workforce Retention Credit -1.612e+00 3.394e-01 -4.749 2.20e-06
NameFarmers' School Tax Credit -1.311e+00 2.696e-01 -4.863 1.26e-06
NameHire a Veteran Credit -2.915e+00 4.706e-01 -6.195 7.15e-10
NameHistoric Properties Rehabilitation Credit 1.918e+00 2.786e-01 6.886 7.81e-12
NameIndustrial or Manufacturing Business Tax Credit -1.718e+00 2.836e-01 -6.059 1.66e-09
NameInvestment Tax Credit 7.147e-02 2.349e-01 0.304 0.760985
NameInvestment Tax Credit for the Financial Services Industry 3.176e-01 2.662e-01 1.193 0.232870
NameLife Sciences Research & Development Tax Credit 7.033e-01 5.543e-01 1.269 0.204728
NameLong-Term Care Insurance Credit -2.863e+00 2.590e-01 -11.051 < 2e-16
NameLow-Income Housing Credit -9.063e-01 2.940e-01 -3.083 0.002080
NameMinimum Wage Reimbursement Credit -1.241e+00 2.656e-01 -4.674 3.17e-06
NameMortgage Servicing Tax Credit -9.378e-01 2.865e-01 -3.273 0.001084
NameNew York Youth Jobs Program Tax Credit -1.330e+00 2.478e-01 -5.367 9.01e-08
NameQETC Capital Tax Credit 4.561e-01 3.661e-01 1.246 0.212956
NameQETC Employment Credit -5.888e-01 2.709e-01 -2.174 0.029855
NameQETC Facilities, Operations, and Training Credit 5.799e-01 5.065e-01 1.145 0.252462
NameReal Property Tax Relief Credit for Manufacturing -1.518e+00 2.550e-01 -5.956 3.09e-09
NameSpecial Additional Mortgage Recording Tax Credit -2.309e-01 2.441e-01 -0.946 0.344374
NameSTART-UP NY Tax Elimination Credit -2.193e+00 2.985e-01 -7.345 3.05e-13
Group1,000,000 - 24,999,999 1.090e+00 9.246e-02 11.785 < 2e-16
Group100,000 - 499,999 3.590e-01 9.451e-02 3.798 0.000150
Group100,000,000 - 499,999,999 1.685e+00 1.040e-01 16.202 < 2e-16
Group25,000,000 - 49,999,999 1.325e+00 1.115e-01 11.883 < 2e-16
Group50,000,000 - 99,999,999 1.473e+00 1.118e-01 13.172 < 2e-16
Group500,000 - 999,999 6.032e-01 1.025e-01 5.886 4.67e-09
Group500,000,000 - and over 2.332e+00 1.016e-01 22.953 < 2e-16
GroupZero or Net Loss 9.934e-01 8.817e-02 11.267 < 2e-16
Num -1.533e-03 2.263e-04 -6.775 1.66e-11
(Intercept) *
Year **
NameAlternative Fuels and Electric Vehicle Recharging Property Credit **
NameAlternative Minimum Tax Credit ***
NameBeer Production Credit .
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15 ***
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 ***
NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08 ***
NameBrownfield Tax Credits - Remediation Real Property Tax Credit
NameClean Heating Fuel Credit ***
NameConservation Easement Tax Credit ***
NameCredit for Employment of Persons with Disabilities ***
NameCredit for Purchase of an Automated External Defibrillator ***
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities
NameEmpire State Apprentice Tax Credit ***
NameEmpire State Commercial Production Credit
NameEmpire State Film Post Production Credit ***
NameEmpire State Film Production Credit ***
NameEmpire State Musical and Theatrical Production Credit
NameExcelsior Jobs Program Credit .
NameEZ/QEZE Tax Credits - EZ Investment Tax Credit ***
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes ***
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit ***
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners ***
NameFarm Workforce Retention Credit ***
NameFarmers' School Tax Credit ***
NameHire a Veteran Credit ***
NameHistoric Properties Rehabilitation Credit ***
NameIndustrial or Manufacturing Business Tax Credit ***
NameInvestment Tax Credit
NameInvestment Tax Credit for the Financial Services Industry
NameLife Sciences Research & Development Tax Credit
NameLong-Term Care Insurance Credit ***
NameLow-Income Housing Credit **
NameMinimum Wage Reimbursement Credit ***
NameMortgage Servicing Tax Credit **
NameNew York Youth Jobs Program Tax Credit ***
NameQETC Capital Tax Credit
NameQETC Employment Credit *
NameQETC Facilities, Operations, and Training Credit
NameReal Property Tax Relief Credit for Manufacturing ***
NameSpecial Additional Mortgage Recording Tax Credit
NameSTART-UP NY Tax Elimination Credit ***
Group1,000,000 - 24,999,999 ***
Group100,000 - 499,999 ***
Group100,000,000 - 499,999,999 ***
Group25,000,000 - 49,999,999 ***
Group50,000,000 - 99,999,999 ***
Group500,000 - 999,999 ***
Group500,000,000 - and over ***
GroupZero or Net Loss ***
Num ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.022 on 1867 degrees of freedom
Multiple R-squared: 0.7368, Adjusted R-squared: 0.7293
F-statistic: 98.61 on 53 and 1867 DF, p-value: < 2.2e-16
GVIF Df GVIF^(1/(2*Df))
Year 2.125869 1 1.458036
Name 3.521453 43 1.014746
Group 1.510763 8 1.026124
Num 1.645484 1 1.282764
#Industry
sat.model.summary(industry_cleaned_bc, sat.field.bc, sat.formula.bc)
Shapiro-Wilk normality test
data: df[[field]]
W = 0.9902, p-value = 5.826e-12
[1] "kurtosis"
[1] -0.4869026
Call:
lm(formula = sat.formula, data = df)
Residuals:
Min 1Q Median 3Q Max
-6.2888 -0.4697 0.0183 0.4928 3.9068
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -6.095e+01 1.107e+01 -5.505 4.09e-08
Year 3.440e-02 5.488e-03 6.269 4.30e-10
NameAlternative Fuels and Electric Vehicle Recharging Property Credit 2.665e-01 2.733e-01 0.975 0.329737
NameAlternative Minimum Tax Credit -2.463e+00 2.272e-01 -10.839 < 2e-16
NameBeer Production Credit 6.334e-01 3.381e-01 1.873 0.061126
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15 2.198e+00 2.415e-01 9.100 < 2e-16
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 2.727e+00 3.930e-01 6.939 5.06e-12
NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08 1.586e+00 2.581e-01 6.148 9.17e-10
NameBrownfield Tax Credits - Remediation Real Property Tax Credit 9.695e-01 2.570e-01 3.772 0.000166
NameClean Heating Fuel Credit -2.544e+00 2.481e-01 -10.252 < 2e-16
NameConservation Easement Tax Credit -1.123e+00 2.538e-01 -4.423 1.02e-05
NameCredit for Employment of Persons with Disabilities -1.245e+00 2.737e-01 -4.549 5.67e-06
NameCredit for Purchase of an Automated External Defibrillator -1.535e+00 2.411e-01 -6.368 2.29e-10
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities -8.171e-02 4.474e-01 -0.183 0.855119
NameEmpire State Apprentice Tax Credit -8.683e-01 5.464e-01 -1.589 0.112125
NameEmpire State Commercial Production Credit 6.235e-01 3.333e-01 1.871 0.061481
NameEmpire State Film Post Production Credit 1.439e+00 2.821e-01 5.100 3.66e-07
NameEmpire State Film Production Credit 3.232e+00 2.584e-01 12.509 < 2e-16
NameEmpire State Musical and Theatrical Production Credit 1.012e+00 4.865e-01 2.080 0.037608
NameExcelsior Jobs Program Credit 1.643e+00 2.372e-01 6.929 5.42e-12
NameEZ/QEZE Tax Credits - EZ Investment Tax Credit 1.310e+00 2.332e-01 5.617 2.17e-08
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit 7.127e-01 2.275e-01 3.133 0.001750
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes 1.707e+00 2.264e-01 7.540 6.61e-14
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners 9.777e-01 2.328e-01 4.200 2.77e-05
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit 1.609e-01 2.303e-01 0.699 0.484921
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners -5.241e-01 2.722e-01 -1.925 0.054320
NameFarm Workforce Retention Credit -8.623e-01 2.890e-01 -2.984 0.002876
NameFarmers' School Tax Credit -1.158e+00 2.771e-01 -4.179 3.03e-05
NameHire a Veteran Credit -1.040e+00 4.456e-01 -2.335 0.019638
NameHistoric Properties Rehabilitation Credit 2.397e+00 2.614e-01 9.172 < 2e-16
NameInvestment Tax Credit 6.981e-01 2.230e-01 3.131 0.001763
NameInvestment Tax Credit for the Financial Services Industry 1.560e+00 3.108e-01 5.019 5.59e-07
NameLife Sciences Research & Development Tax Credit 9.057e-01 4.864e-01 1.862 0.062697
NameLong-Term Care Insurance Credit -1.867e+00 2.268e-01 -8.231 3.01e-16
NameLow-Income Housing Credit 1.331e+00 2.967e-01 4.487 7.57e-06
NameMinimum Wage Reimbursement Credit -9.161e-01 2.366e-01 -3.872 0.000111
NameMortgage Servicing Tax Credit 8.932e-01 3.489e-01 2.560 0.010534
NameNew York Youth Jobs Program Tax Credit 1.263e-01 2.307e-01 0.548 0.584069
NameQETC Capital Tax Credit 1.162e+00 3.168e-01 3.667 0.000251
NameQETC Employment Credit -3.048e-01 2.411e-01 -1.264 0.206249
NameQETC Facilities, Operations, and Training Credit 1.147e+00 3.624e-01 3.165 0.001571
NameReal Property Tax Relief Credit for Manufacturing -6.559e-01 2.388e-01 -2.746 0.006077
NameSpecial Additional Mortgage Recording Tax Credit 7.505e-01 2.393e-01 3.136 0.001736
NameSTART-UP NY Tax Elimination Credit -1.834e+00 2.565e-01 -7.151 1.14e-12
GroupAdministrative and Support and Waste Management and Remediation Services 2.878e-01 1.360e-01 2.116 0.034417
GroupAdministrative/Support/Waste Management/Remediation Services 1.675e-01 1.511e-01 1.109 0.267612
GroupAgriculture, Forestry, Fishing and Hunting -1.132e-01 1.258e-01 -0.899 0.368486
GroupArts, Entertainment, and Recreation 5.595e-01 1.251e-01 4.473 8.09e-06
GroupConstruction -4.299e-02 1.172e-01 -0.367 0.713860
GroupEducational Services 2.253e-01 1.592e-01 1.415 0.157160
GroupFinance and Insurance 5.350e-01 1.099e-01 4.870 1.19e-06
GroupHealth Care and Social Assistance -6.777e-02 1.237e-01 -0.548 0.583720
GroupInformation 7.098e-01 1.176e-01 6.037 1.81e-09
GroupManagement of Companies and Enterprises 6.672e-01 1.052e-01 6.340 2.73e-10
GroupManufacturing 4.608e-01 1.095e-01 4.208 2.67e-05
GroupMining 2.515e-01 1.940e-01 1.296 0.194977
GroupMining, Quarrying, and Oil and Gas Extraction 3.869e-01 1.626e-01 2.379 0.017421
GroupOther Services (except Public Administration) -1.550e-01 1.197e-01 -1.295 0.195482
GroupProfessional, Scientific, and Technical Services 4.875e-01 1.126e-01 4.331 1.55e-05
GroupReal Estate and Rental and Leasing 1.461e-01 1.104e-01 1.324 0.185596
GroupRetail Trade 3.754e-01 1.100e-01 3.414 0.000650
GroupTransportation and Warehousing 2.125e-01 1.242e-01 1.711 0.087179
GroupUtilities 6.972e-01 1.422e-01 4.904 1.00e-06
GroupWholesale Trade 4.367e-01 1.124e-01 3.886 0.000105
Num 4.542e-04 2.307e-04 1.969 0.049082
(Intercept) ***
Year ***
NameAlternative Fuels and Electric Vehicle Recharging Property Credit
NameAlternative Minimum Tax Credit ***
NameBeer Production Credit .
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15 ***
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 ***
NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08 ***
NameBrownfield Tax Credits - Remediation Real Property Tax Credit ***
NameClean Heating Fuel Credit ***
NameConservation Easement Tax Credit ***
NameCredit for Employment of Persons with Disabilities ***
NameCredit for Purchase of an Automated External Defibrillator ***
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities
NameEmpire State Apprentice Tax Credit
NameEmpire State Commercial Production Credit .
NameEmpire State Film Post Production Credit ***
NameEmpire State Film Production Credit ***
NameEmpire State Musical and Theatrical Production Credit *
NameExcelsior Jobs Program Credit ***
NameEZ/QEZE Tax Credits - EZ Investment Tax Credit ***
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit **
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes ***
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners ***
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners .
NameFarm Workforce Retention Credit **
NameFarmers' School Tax Credit ***
NameHire a Veteran Credit *
NameHistoric Properties Rehabilitation Credit ***
NameInvestment Tax Credit **
NameInvestment Tax Credit for the Financial Services Industry ***
NameLife Sciences Research & Development Tax Credit .
NameLong-Term Care Insurance Credit ***
NameLow-Income Housing Credit ***
NameMinimum Wage Reimbursement Credit ***
NameMortgage Servicing Tax Credit *
NameNew York Youth Jobs Program Tax Credit
NameQETC Capital Tax Credit ***
NameQETC Employment Credit
NameQETC Facilities, Operations, and Training Credit **
NameReal Property Tax Relief Credit for Manufacturing **
NameSpecial Additional Mortgage Recording Tax Credit **
NameSTART-UP NY Tax Elimination Credit ***
GroupAdministrative and Support and Waste Management and Remediation Services *
GroupAdministrative/Support/Waste Management/Remediation Services
GroupAgriculture, Forestry, Fishing and Hunting
GroupArts, Entertainment, and Recreation ***
GroupConstruction
GroupEducational Services
GroupFinance and Insurance ***
GroupHealth Care and Social Assistance
GroupInformation ***
GroupManagement of Companies and Enterprises ***
GroupManufacturing ***
GroupMining
GroupMining, Quarrying, and Oil and Gas Extraction *
GroupOther Services (except Public Administration)
GroupProfessional, Scientific, and Technical Services ***
GroupReal Estate and Rental and Leasing
GroupRetail Trade ***
GroupTransportation and Warehousing .
GroupUtilities ***
GroupWholesale Trade ***
Num *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.8703 on 2411 degrees of freedom
Multiple R-squared: 0.7631, Adjusted R-squared: 0.7569
F-statistic: 121.4 on 64 and 2411 DF, p-value: < 2.2e-16
GVIF Df GVIF^(1/(2*Df))
Year 2.190806 1 1.480137
Name 5.185764 42 1.019787
Group 2.724217 20 1.025371
Num 1.370920 1 1.170863
BIC comparison before and after BoxCox transform
BIC(income.model.bc, income.model)
BIC(industry.model.bc, industry.model)
Stepwise Regression on Income_cat_bc (boxcox transformed dataset)
#creating dummy variable columns for stepwise
dummy_func <- function (df){
x = model.matrix(Avg.bc ~., df)[, -1]
dummy_bc = as.data.frame(x) %>% mutate(Avg.bc = df$Avg.bc)
#colnames(dummy_bc) <- str_replace_all(colnames(dummy_bc), "-|'|/| |,|�|&" , '_')
colnames(dummy_bc) <- str_replace_all(colnames(dummy_bc), "[-'/ ,�&()`]" , '_')
return(dummy_bc)
}
Cleaning column names further so stepwise regression doesn’t present any errors
#Income Group Dataset
income.dummy.bc <- dummy_func(income_cleaned_bc)
#colnames(income.dummy.bc)[37] <- 'NameManufactureru0092s_Real_Property_Tax_Credit'
colnames(income.dummy.bc)
#Industry Group Dataset
industry.dummy.bc <- dummy_func(industry_cleaned_bc)
#colnames(industry.dummy.bc)[35] <- 'NameManufactureru0092s_Real_Property_Tax_Credit'
colnames(industry.dummy.bc)
Stepwise regression using BIC as the criteria (k = log(n)).
#Creating Stepwise Models
bcs = list(income = income.dummy.bc, industry = industry.dummy.bc)
model.fulls = list(income = lm(Avg.bc ~ ., data = income.dummy.bc), industry = lm(Avg.bc ~ ., data = industry.dummy.bc)) #All variables
model.emptys = list(income = lm(Avg.bc ~ 1, data = income.dummy.bc), industry = lm(Avg.bc ~ 1, data = industry.dummy.bc)) #intercept only
k = c('income', 'industry')
forwardBIC = list(income = NULL, industry = NULL)
backwardBIC = list(income = NULL, industry = NULL)
for (i in k){
bc = bcs[[i]]
scope = list(lower = formula(model.emptys[[i]]), upper = formula(model.fulls[[i]]))
n_obs = bc %>% count() %>% dplyr::first()
forwardBIC[[i]] = step(model.emptys[[i]], scope, direction = "forward", k = log(n_obs))
backwardBIC[[i]] = step(model.fulls[[i]], scope, direction = "backward", k = log(n_obs))
}
Selecting Best Formula per Dataset from Stepwise Regressions
bic_func <- function (BIC.model){
print('Adjusted R Squared:')
print(summary(BIC.model)$adj.r.squared)
print('Number of Coefficients:')
print(dim(summary(BIC.model)$coefficient)[1])
print('VIF Check: ')
print(max(vif(BIC.model)))
print("*************************")
}
bic_func(forwardBIC[['income']])
[1] "Adjusted R Squared:"
[1] 0.7287541
[1] "Number of Coefficients:"
[1] 41
[1] "VIF Check: "
[1] 1.867767
[1] "*************************"
bic_func(backwardBIC[['income']])
[1] "Adjusted R Squared:"
[1] 0.7287541
[1] "Number of Coefficients:"
[1] 41
[1] "VIF Check: "
[1] 1.867767
[1] "*************************"
bic_func(forwardBIC[['industry']])
[1] "Adjusted R Squared:"
[1] 0.7532731
[1] "Number of Coefficients:"
[1] 42
[1] "VIF Check: "
[1] 1.594046
[1] "*************************"
bic_func(backwardBIC[['industry']])
[1] "Adjusted R Squared:"
[1] 0.7527448
[1] "Number of Coefficients:"
[1] 43
[1] "VIF Check: "
[1] 2.244589
[1] "*************************"
#Manual reduction of variables using VIF and then checked versus saturated model with Anova. This was not used because the saturated model contained multicollinearity issues as indicated by a high VIF score on some coefficients. And the anova test suggested that the coefficients removed in the reduced model were informative in our model, so we couldn't use it either. Thus Stepwise reduction is the preferred method for best model fit.
# VIF.variables <- as.data.frame(vif(model.fulls[['industry']])) %>%
# select(VIF = `vif(model.fulls[["industry"]])`) %>%
# filter(VIF > 5) %>% rownames()
#
# industry.dummy.bc.VIF <- industry.dummy.bc %>% select(-all_of(VIF.variables))
# industry.model.VIF <- lm(Avg.bc ~ ., data = industry.dummy.bc.VIF)
# summary(industry.model.VIF)
# anova(industry.model.VIF, model.fulls[['industry']])
Best Model Selection from Stepwise
#The Best Models selected for both income and industry were forwardBIC.
#Income Group Dataset
income.best.formula <- forwardBIC[['income']]$call[[2]]
income.best.formula
#Industry Group Dataset
industry.best.formula <- forwardBIC[['industry']]$call[[2]]
industry.best.formula
Splitting data up into test data and training data (test data is for year 2019, training is the rest)
test_train_split <- function(dummy_bc, best.formula) {
# data.test <- dummy_bc %>% filter(Year == 2019)
# data.train <- dummy_bc %>% filter(Year != 2019)
X <- model.matrix(best.formula, data = dummy_bc)[,-1]
y <- as.matrix(dummy_bc %>% select(all.vars(best.formula)[1]))
set.seed(0)
train.i = sample(1:nrow(dummy_bc), 0.8*nrow(dummy_bc), replace = F)
#train
X.train <- X[train.i,]
y.train <- y[train.i,]
#test
X.test <- X[-train.i,]
y.test <- y[-train.i,]
data.train <- as.data.frame(cbind(y.train, X.train))
data.test <- as.data.frame(cbind(y.test, X.test))
colnames(data.train)[1] = all.vars(best.formula)[1]
colnames(data.test)[1] = all.vars(best.formula)[1]
return (list('X.train' = X.train, 'y.train' = y.train, 'X.test' = X.test, 'y.test' = y.test, 'data.train' = data.train, 'data.test' = data.test))
}
test_train_split(income_cleaned, sat.formula)
$X.train
Year NameAlternative Fuels and Electric Vehicle Recharging Property Credit NameAlternative Minimum Tax Credit NameBeer Production Credit
1422 2011 0 0 0
1017 2014 0 0 0
1860 2002 0 0 0
679 2016 0 0 0
129 2019 0 0 0
930 2015 0 0 0
1533 2011 0 0 0
471 2017 0 0 0
299 2018 0 0 0
270 2018 0 0 0
1211 2013 0 0 0
1331 2012 0 0 0
597 2016 0 0 0
1301 2012 0 0 0
1898 2001 0 0 0
1518 2011 0 0 0
330 2018 0 0 0
1799 2003 0 1 0
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15
1422 0
1017 0
1860 0
679 0
129 0
930 0
1533 0
471 0
299 0
270 0
1211 0
1331 0
597 0
1301 0
1898 0
1518 0
330 0
1799 0
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08
1422 0 0
1017 0 0
1860 0 0
679 0 0
129 0 0
930 0 0
1533 0 0
471 0 0
299 0 0
270 0 0
1211 0 0
1331 0 0
597 0 0
1301 0 0
1898 0 0
1518 0 0
330 0 0
1799 0 0
NameBrownfield Tax Credits - Remediation Real Property Tax Credit NameClean Heating Fuel Credit NameConservation Easement Tax Credit
1422 1 0 0
1017 0 0 0
1860 0 0 0
679 0 0 0
129 0 0 0
930 0 0 0
1533 0 0 0
471 0 0 0
299 0 0 0
270 0 0 0
1211 0 0 0
1331 0 0 0
597 0 0 0
1301 0 0 0
1898 0 0 0
1518 0 0 0
330 0 0 0
1799 0 0 0
NameCredit for Employment of Persons with Disabilities NameCredit for Purchase of an Automated External Defibrillator
1422 0 0
1017 0 0
1860 0 0
679 0 0
129 0 0
930 0 0
1533 0 0
471 0 0
299 0 0
270 0 0
1211 0 0
1331 0 0
597 0 0
1301 0 0
1898 0 0
1518 0 0
330 0 0
1799 0 0
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities NameEmpire State Apprentice Tax Credit
1422 0 0
1017 0 0
1860 0 0
679 0 0
129 0 0
930 0 0
1533 0 0
471 0 0
299 0 0
270 0 0
1211 0 0
1331 0 0
597 0 0
1301 0 0
1898 0 0
1518 0 0
330 0 0
1799 0 0
NameEmpire State Commercial Production Credit NameEmpire State Film Post Production Credit NameEmpire State Film Production Credit
1422 0 0 0
1017 0 0 0
1860 0 0 0
679 0 0 0
129 0 0 0
930 0 0 0
1533 0 0 0
471 0 0 0
299 0 0 0
270 0 0 0
1211 0 0 0
1331 0 0 0
597 0 0 1
1301 0 0 1
1898 0 0 0
1518 0 0 0
330 0 0 0
1799 0 0 0
NameEmpire State Musical and Theatrical Production Credit NameExcelsior Jobs Program Credit NameEZ/QEZE Tax Credits - EZ Investment Tax Credit
1422 0 0 0
1017 0 0 0
1860 0 0 0
679 0 0 0
129 0 0 0
930 0 0 0
1533 0 0 0
471 0 0 0
299 0 0 0
270 0 0 0
1211 0 0 0
1331 0 0 0
597 0 0 0
1301 0 0 0
1898 0 0 0
1518 0 0 0
330 0 0 0
1799 0 0 0
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes
1422 0 0
1017 1 0
1860 1 0
679 0 0
129 0 0
930 0 0
1533 0 0
471 0 0
299 0 0
270 0 0
1211 0 0
1331 0 1
597 0 0
1301 0 0
1898 1 0
1518 0 0
330 0 0
1799 0 0
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit
1422 0 0
1017 0 0
1860 0 0
679 0 0
129 0 0
930 0 0
1533 0 0
471 0 0
299 0 0
270 0 0
1211 0 0
1331 0 0
597 0 0
1301 0 0
1898 0 0
1518 0 0
330 0 0
1799 0 0
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners NameFarm Workforce Retention Credit NameFarmers' School Tax Credit
1422 0 0 0
1017 0 0 0
1860 0 0 0
679 0 0 0
129 0 0 0
930 0 0 0
1533 0 0 0
471 0 0 0
299 0 0 0
270 0 0 1
1211 1 0 0
1331 0 0 0
597 0 0 0
1301 0 0 0
1898 0 0 0
1518 0 0 0
330 0 0 0
1799 0 0 0
NameHire a Veteran Credit NameHistoric Properties Rehabilitation Credit NameIndustrial or Manufacturing Business Tax Credit
1422 0 0 0
1017 0 0 0
1860 0 0 0
679 0 0 0
129 0 0 0
930 0 0 0
1533 0 0 0
471 0 0 0
299 0 0 0
270 0 0 0
1211 0 0 0
1331 0 0 0
597 0 0 0
1301 0 0 0
1898 0 0 0
1518 0 0 0
330 0 0 0
1799 0 0 0
NameInvestment Tax Credit NameInvestment Tax Credit for the Financial Services Industry NameLife Sciences Research & Development Tax Credit
1422 0 0 0
1017 0 0 0
1860 0 0 0
679 0 0 0
129 0 0 0
930 0 0 0
1533 0 0 0
471 1 0 0
299 0 0 0
270 0 0 0
1211 0 0 0
1331 0 0 0
597 0 0 0
1301 0 0 0
1898 0 0 0
1518 0 1 0
330 0 0 0
1799 0 0 0
NameLong-Term Care Insurance Credit NameLow-Income Housing Credit NameMinimum Wage Reimbursement Credit NameMortgage Servicing Tax Credit
1422 0 0 0 0
1017 0 0 0 0
1860 0 0 0 0
679 1 0 0 0
129 0 0 0 0
930 0 0 0 0
1533 0 0 0 0
471 0 0 0 0
299 0 1 0 0
270 0 0 0 0
1211 0 0 0 0
1331 0 0 0 0
597 0 0 0 0
1301 0 0 0 0
1898 0 0 0 0
1518 0 0 0 0
330 0 0 0 0
1799 0 0 0 0
NameNew York Youth Jobs Program Tax Credit NameQETC Capital Tax Credit NameQETC Employment Credit
1422 0 0 0
1017 0 0 0
1860 0 0 0
679 0 0 0
129 0 0 0
930 0 0 0
1533 0 0 1
471 0 0 0
299 0 0 0
270 0 0 0
1211 0 0 0
1331 0 0 0
597 0 0 0
1301 0 0 0
1898 0 0 0
1518 0 0 0
330 1 0 0
1799 0 0 0
NameQETC Facilities, Operations, and Training Credit NameReal Property Tax Relief Credit for Manufacturing
1422 0 0
1017 0 0
1860 0 0
679 0 0
129 0 1
930 0 0
1533 0 0
471 0 0
299 0 0
270 0 0
1211 0 0
1331 0 0
597 0 0
1301 0 0
1898 0 0
1518 0 0
330 0 0
1799 0 0
NameSpecial Additional Mortgage Recording Tax Credit NameSTART-UP NY Tax Elimination Credit Group1,000,000 - 24,999,999 Group100,000 - 499,999
1422 0 0 0 0
1017 0 0 0 0
1860 0 0 0 0
679 0 0 0 0
129 0 0 0 0
930 0 1 0 0
1533 0 0 0 0
471 0 0 0 0
299 0 0 0 0
270 0 0 0 1
1211 0 0 0 0
1331 0 0 0 1
597 0 0 0 0
1301 0 0 0 0
1898 0 0 0 0
1518 0 0 0 1
330 0 0 1 0
1799 0 0 0 1
Group100,000,000 - 499,999,999 Group25,000,000 - 49,999,999 Group50,000,000 - 99,999,999 Group500,000 - 999,999 Group500,000,000 - and over
1422 0 0 0 0 0
1017 1 0 0 0 0
1860 0 0 0 0 1
679 0 0 0 1 0
129 0 0 0 0 0
930 0 0 0 0 0
1533 0 0 0 0 0
471 0 0 0 0 0
299 0 0 0 0 0
270 0 0 0 0 0
1211 0 0 1 0 0
1331 0 0 0 0 0
597 0 0 0 0 0
1301 0 0 0 0 0
1898 0 0 0 1 0
1518 0 0 0 0 0
330 0 0 0 0 0
1799 0 0 0 0 0
GroupZero or Net Loss Num
1422 1 3.000000
1017 0 1.333333
1860 0 4.000000
679 0 3.000000
129 0 69.000000
930 1 16.000000
1533 0 4.000000
471 0 96.000000
299 1 9.000000
270 0 6.500000
1211 0 1.666667
1331 0 30.000000
597 0 9.000000
1301 1 25.000000
1898 0 11.000000
1518 0 1.000000
330 0 7.000000
1799 0 27.000000
[ reached getOption("max.print") -- omitted 1518 rows ]
$y.train
[1] 1.444633e+05 8.423750e+05 7.176375e+06 4.961167e+03 3.044377e+03 1.028000e+03 6.475250e+03 7.377635e+03 1.666888e+05 1.797438e+04 9.649170e+04
[12] 1.531214e+05 1.689574e+06 9.546898e+05 6.607509e+04 3.853515e+05 1.631250e+05 8.420778e+03 1.076731e+04 4.837649e+05 2.425910e+04 1.081733e+04
[23] 2.909133e+04 3.154922e+05 1.248333e+02 2.984333e+03 1.041887e+05 1.884164e+04 1.922213e+03 8.834333e+03 1.350001e+05 9.000000e+02 1.652030e+05
[34] 2.317820e+06 9.014667e+03 7.734375e+04 2.668805e+03 1.291816e+06 1.445827e+05 1.198904e+05 4.806000e+03 1.598841e+05 2.381278e+05 1.291816e+06
[45] 5.573088e+04 1.462504e+05 1.280000e+04 2.504000e+03 1.088920e+04 1.491991e+04 1.977783e+03 1.269140e+05 6.871308e+05 1.895283e+05 2.103778e+03
[56] 1.088920e+04 3.328420e+06 1.077718e+06 2.966550e+05 2.697835e+07 8.956831e+06 5.188400e+04 1.638083e+05 2.461394e+03 6.726345e+05 2.682842e+05
[67] 3.182500e+03 7.892796e+05 3.545919e+06 1.546916e+05 8.601000e+03 1.444793e+05 3.029539e+05 7.185038e+05 3.516372e+04 1.892595e+06 1.291639e+05
[78] 3.596250e+05 4.224133e+04 1.062724e+06 4.986983e+04 1.277272e+03 2.817453e+04 4.616608e+05 7.099286e+03 2.005162e+05 1.119670e+05 9.076564e+02
[89] 2.270700e+04 1.304214e+04 9.278836e+05 5.229374e+06 5.505667e+03 9.718222e+05 3.497500e+05 3.055396e+04 1.291816e+06 8.829722e+03 1.411650e+05
[100] 1.520335e+05 2.350517e+05 2.662875e+04 2.984333e+03 2.317820e+06 2.317820e+06 3.170010e+05 3.853515e+05 8.561924e+03 4.025600e+04 9.820333e+03
[111] 6.953466e+06 3.386785e+05 2.343167e+03 2.527122e+05 1.524894e+06 1.034997e+05 3.659458e+03 1.048103e+05 9.621489e+04 1.390581e+04 8.403113e+04
[122] 1.498561e+05 7.709066e+05 6.233342e+04 5.459386e+04 6.988889e+04 2.361618e+04 1.511225e+05 2.909000e+03 1.396708e+05 6.955556e+02 6.510700e+04
[133] 1.393300e+06 4.755113e+05 4.953022e+04 2.070642e+06 2.865400e+03 7.973200e+03 1.753420e+06 2.005981e+05 4.343602e+04 9.077196e+05 6.560525e+05
[144] 2.718876e+05 2.573558e+05 4.640935e+05 4.088167e+03 1.907142e+05 8.431746e+05 8.334069e+02 4.711452e+06 2.437333e+03 5.206620e+05 1.365532e+06
[155] 2.662875e+04 4.428599e+05 1.883396e+05 1.270637e+06 7.882333e+03 1.555957e+05 5.661475e+04 1.281247e+03 1.224495e+05 4.454760e+04 1.048040e+06
[166] 1.414577e+07 2.551955e+05 4.000000e+03 2.935100e+04 1.089000e+03 7.850660e+02 1.208941e+03 1.017148e+05 8.430500e+04 8.166800e+03 7.785000e+04
[177] 9.577243e+05 1.839021e+03 8.514848e+03 1.453340e+04 1.907825e+06 1.635961e+06 1.666667e+03 4.411375e+04 1.585990e+05 5.470139e+04 9.078692e+03
[188] 2.057612e+06 5.963000e+03 3.881863e+07 1.097103e+05 2.875100e+04 1.259430e+05 1.419562e+04 3.533451e+06 3.568112e+04 8.183533e+04 9.277220e+04
[199] 8.411148e+03 1.182098e+05 2.609570e+05 6.719991e+03 9.211780e+04 7.275000e+04 1.029659e+05 8.403640e+04 4.454760e+04 3.894155e+05 1.012616e+06
[210] 2.875100e+04 2.875100e+04 6.953466e+06 2.049280e+04 8.088440e+04 6.387367e+02 1.528336e+04 4.986983e+04 3.514050e+05 1.457522e+05 1.797438e+04
[221] 4.299765e+05 1.605500e+03 1.312732e+05 1.070463e+05 1.648964e+04 8.439882e+05 1.036925e+05 3.526312e+05 4.550158e+06 2.865085e+05 7.008638e+04
[232] 1.604485e+06 3.867030e+05 5.951348e+03 1.006732e+05 1.342010e+05 1.593878e+04 3.172615e+03 2.243429e+04 6.666263e+05 6.574713e+06 2.257274e+05
[243] 4.997866e+06 2.916333e+03 4.431619e+03 9.401431e+04 4.429659e+05 1.951614e+06 1.263178e+04 2.229875e+03 3.237500e+04 1.652056e+04 4.219450e+04
[254] 7.237404e+04 3.599000e+03 8.034048e+04 1.350364e+05 1.981250e+03 8.297867e+04 1.361764e+04 1.550125e+05 1.883396e+05 2.019664e+03 9.895250e+03
[265] 9.585714e+04 7.779916e+04 9.275383e+06 6.309391e+05 5.597556e+03 7.689884e+06 2.533744e+04 7.666998e+05 3.559825e+04 6.500000e+03 4.731587e+05
[276] 2.043000e+03 3.599842e+06 1.723479e+05 5.659249e+05 2.701608e+04 1.531020e+03 7.022533e+04 1.442556e+05 9.195680e+04 2.917684e+06 4.104042e+05
[287] 1.652030e+05 2.606807e+05 7.827581e+03 2.071826e+05 2.471333e+04 1.744272e+06 4.507765e+05 6.598476e+03 5.501250e+03 3.221225e+04 7.961263e+03
[298] 1.431600e+05 1.195833e+05 2.486504e+05 1.744272e+06 9.639100e+04 3.249400e+03 4.724236e+04 3.463333e+02 2.743826e+05 1.395038e+07 4.000000e+03
[309] 1.621466e+06 4.749600e+03 8.220014e+03 6.858830e+05 9.249500e+03 1.162711e+03 6.028500e+03 1.953371e+05 1.753420e+06 7.460362e+03 8.069371e+04
[320] 2.065853e+03 1.308392e+04 6.116453e+04 9.479250e+03 1.768722e+03 4.409700e+04 7.164380e+05 1.839143e+03 2.052277e+06 8.500446e+04 2.387808e+05
[331] 1.983803e+05 6.522300e+04 8.597294e+03 1.830973e+05 1.674600e+04 9.249500e+03 8.142886e+05 1.410010e+05 1.817167e+03 5.713889e+03 1.008445e+05
[342] 1.509049e+05 1.336542e+05 2.174043e+04 2.881370e+05 1.316265e+05 1.776947e+04 1.017148e+05 5.720700e+04 2.363211e+03 8.035676e+06 3.352425e+04
[353] 1.360056e+03 1.692011e+05 1.862500e+04 2.678972e+04 3.894155e+05 3.732190e+05 1.724660e+05 7.476289e+05 9.350000e+04 3.070124e+05 2.578891e+05
[364] 2.427300e+04 5.057549e+02 7.893612e+05 1.442556e+05 2.388377e+05 1.961038e+03 4.336100e+03 6.522300e+04 2.952875e+04 3.894155e+05 1.254752e+05
[375] 5.532592e+05 1.301051e+04 2.952875e+04 2.414375e+04 3.766700e+05 7.881465e+03 2.943000e+03 5.601905e+04 7.008638e+04 1.513386e+04 1.635961e+06
[386] 6.517918e+05 1.000933e+05 1.577174e+06 3.203050e+04 8.834333e+03 2.797852e+05 3.759082e+05 7.911100e+04 9.140000e+04 2.046650e+05 1.724660e+05
[397] 2.700000e+01 3.009937e+06 3.217702e+05 2.390226e+06 2.869385e+03 2.697835e+07 4.763629e+04 2.223962e+03 9.263605e+04 8.823204e+05 1.331250e+04
[408] 1.580114e+05 3.106455e+03 3.599000e+03 7.528723e+05 5.516919e+03 4.206549e+06 7.385556e+02 6.596991e+04 3.784080e+04 6.866131e+04 2.159120e+05
[419] 8.966268e+03 7.500000e+03 1.373150e+03 2.688150e+04 2.646539e+05 8.126870e+03 5.064518e+04 2.381278e+05 5.206620e+05 4.948418e+05 3.875917e+05
[430] 1.364231e+05 2.070700e+04 2.249117e+04 1.631250e+04 1.430898e+03 2.874067e+04 1.112230e+06 5.055432e+04 5.690714e+03 8.069371e+04 3.661076e+02
[441] 3.747375e+03 3.706131e+05 3.224383e+06 1.440787e+05 1.985550e+05 8.032067e+04 2.966550e+05 3.921104e+06 1.307548e+06 2.072544e+04 1.561672e+04
[452] 4.184533e+04 4.290383e+04 3.475216e+06 2.682842e+05 9.649170e+04 5.238906e+03 8.673243e+03 8.410238e+05 3.512020e+05 9.275383e+06 4.629051e+05
[463] 2.631080e+05 1.590391e+05 7.401750e+04 1.621466e+06 1.431961e+04 7.218750e+03 4.674235e+05 2.551955e+05 1.441000e+04 1.441926e+05 2.581267e+03
[474] 1.201790e+05 2.692040e+05 2.931458e+05 1.264478e+05 1.553825e+06 3.955429e+03 6.159091e+04 1.821302e+05 3.867532e+05 1.272638e+05 3.506223e+04
[485] 2.332435e+06 9.856200e+03 4.715981e+05 9.633245e+04 4.645433e+05 3.521641e+06 1.566158e+03 8.495484e+03 2.174043e+04 5.587362e+03 3.008432e+04
[496] 8.475000e+04 7.388821e+05 2.551955e+05 8.295545e+03 6.500000e+03 9.625600e+03 9.643000e+03 7.218750e+03 2.600100e+04 9.087042e+04 2.038725e+04
[507] 5.772857e+02 6.979000e+04 1.109067e+04 4.915443e+04 7.237286e+03 1.500000e+03 2.633555e+04 1.935780e+05 4.372832e+05 6.779667e+03 1.317561e+03
[518] 3.723650e+04 8.329513e+05 1.399135e+06 1.115225e+04 4.735132e+04 1.782359e+04 4.867948e+05 4.233333e+04 5.698936e+05 1.033540e+04 2.060914e+04
[529] 3.662863e+05 4.986983e+04 3.387741e+05 7.730148e+04 1.754811e+05 6.929400e+04 3.083700e+04 9.643000e+03 1.675607e+05 1.225665e+05 8.423750e+05
[540] 2.174043e+04 8.834333e+03 6.382588e+05 1.537276e+04 1.358533e+04 2.966604e+05 1.797295e+03 2.934556e+03 4.767829e+03 1.945612e+05 1.525145e+05
[551] 8.549525e+04 1.540060e+05 2.966550e+05 2.190692e+04 3.711855e+06 1.600392e+05 2.070700e+04 3.280333e+03 1.548511e+04 7.364275e+04 1.015137e+04
[562] 4.288667e+03 7.218750e+03 1.767339e+05 1.524894e+06 4.276329e+03 1.593400e+03 1.674600e+04 3.627438e+05 1.860170e+05 6.330952e+02 9.055853e+05
[573] 3.571560e+04 2.543800e+03 3.599842e+06 6.818975e+04 1.924658e+04 7.286300e+04 1.763107e+05 2.796945e+06 4.835950e+04 5.178571e+02 5.536867e+04
[584] 1.010000e+04 6.505425e+06 6.871308e+05 1.378400e+05 3.264100e+04 9.552330e+05 7.547667e+03 2.966604e+05 1.719513e+04 6.675000e+03 5.268800e+04
[595] 4.302760e+04 8.069371e+04 3.009937e+06 1.291816e+06 3.382220e+05 1.301195e+03 6.857040e+04 4.379652e+04 4.587867e+04 1.822071e+05 6.166634e+05
[606] 1.891474e+05 6.567026e+05 1.958275e+04 4.589250e+05 6.807756e+02 4.837500e+04 5.666667e+03 5.690714e+03 4.281964e+05 6.590500e+06 9.174549e+03
[617] 1.471271e+05 1.860649e+04 1.342010e+05 5.993612e+05 2.457747e+05 2.970000e+04 3.989468e+06 9.178216e+05 1.141380e+05 4.770928e+07 1.969696e+06
[628] 3.370829e+04 1.035016e+05 1.263462e+04 4.559300e+03 5.032419e+03 1.900342e+04 8.980202e+03 6.644359e+04 2.309489e+06 5.354132e+05 7.596291e+05
[639] 1.933758e+05 3.498428e+02 4.048120e+04 1.916667e+04 2.134563e+06 1.096976e+06 5.375000e+03 1.633797e+05 5.188400e+04 4.302778e+02 1.645415e+05
[650] 5.149952e+05 4.454760e+04 9.482320e+05 7.282220e+04 4.339114e+04 3.311889e+06 4.172094e+05 6.040674e+02 3.647125e+05 1.322500e+05 3.956627e+05
[661] 7.251105e+03 2.952875e+04 1.305160e+04 7.215350e+05 1.115700e+05 1.943908e+05 8.414412e+05 1.951614e+06 1.195486e+06 2.174291e+06 6.567026e+05
[672] 1.815720e+04 1.020194e+06 4.459425e+04 2.658154e+05 2.185941e+04 7.148415e+05 3.989468e+06 7.264600e+03 3.258643e+04 1.263178e+04 9.000000e+03
[683] 2.758867e+04 5.471792e+03 1.138104e+03 3.122021e+04 2.650302e+05 1.499504e+04 6.149972e+06 5.534600e+04 3.287828e+04 2.200770e+05 1.406547e+03
[694] 2.062730e+06 1.077469e+06 2.535815e+04 1.811436e+04 1.462500e+04 1.297517e+05 5.762179e+03 3.712824e+04 1.792837e+05 2.086620e+04 3.816286e+02
[705] 9.723300e+04 1.635961e+06 1.792837e+05 7.709066e+05 1.752400e+04 8.989200e+04 1.451944e+05 1.744272e+06 2.468480e+04 3.550520e+04 7.963766e+04
[716] 7.714926e+04 1.085837e+03 1.642082e+06 6.004488e+05 1.631250e+04 8.631786e+03 4.375000e+03 9.000000e+02 2.494493e+06 3.931914e+04 1.951614e+06
[727] 7.573100e+04 4.390320e+04 7.810820e+04 3.206332e+07 2.718876e+05 4.219450e+04 3.656250e+05 6.103333e+02 6.292798e+03 4.720975e+05 8.008404e+04
[738] 2.093267e+04 5.204661e+04 6.747323e+07 2.633555e+04 1.107723e+06 1.075657e+04 1.666667e+03 2.352031e+05 1.682127e+03 1.066898e+04 4.551714e+03
[749] 9.127015e+02 8.329513e+05 4.986983e+04 6.520600e+04 1.030300e+04 1.406217e+03 9.577243e+05 6.083896e+06 7.259900e+04 1.548711e+04 1.747579e+07
[760] 3.730476e+04 4.341197e+05 2.317820e+06 1.883433e+06 9.150866e+04 2.257274e+05 4.614060e+03 7.218750e+03 1.844095e+04 9.090745e+04 8.088440e+04
[771] 6.068333e+03 7.141463e+05 4.334150e+04 1.034446e+04 5.690714e+03 1.809172e+04 4.172444e+03 5.941159e+03 1.050744e+04 2.682842e+05 6.039756e+04
[782] 3.282500e+03 4.299765e+05 2.857741e+06 1.115225e+04 3.387741e+05 2.399810e+06 9.479250e+03 1.835979e+06 1.246188e+04 1.845057e+05 6.209358e+06
[793] 1.158932e+06 9.450000e+04 2.174043e+04 2.844688e+05 2.272808e+03 4.048120e+04 1.727830e+05 4.281964e+05 1.615562e+04 1.809172e+04 5.531483e+07
[804] 1.264478e+05 4.139416e+05 2.056096e+04 1.280000e+04 9.249500e+03 3.009937e+06 1.593400e+03 5.603233e+04 1.389126e+05 4.891083e+04 3.084800e+04
[815] 8.499895e+03 7.646458e+04 2.164482e+04 1.090874e+05 2.987667e+03 3.700803e+05 4.527100e+04 8.329513e+05 2.276824e+03 5.263393e+05 6.726345e+05
[826] 1.546916e+05 1.113168e+04 2.693728e+04 1.220746e+06 2.427791e+06 5.206620e+05 4.010397e+06 3.672033e+04 6.500000e+03 5.382279e+05 5.109273e+03
[837] 3.526312e+05 3.512020e+05 1.115549e+04 1.500000e+03 2.295050e+03 2.473152e+05 6.550323e+05 1.125441e+05 4.029300e+05 3.238555e+05 1.500000e+03
[848] 1.621466e+06 1.253227e+05 4.198850e+04 2.381278e+05 1.935780e+05 6.818975e+04 1.440260e+06 4.202889e+04 2.010845e+06 4.436802e+06 2.771255e+05
[859] 5.033900e+04 6.500000e+03 2.631080e+05 2.419256e+04 1.524894e+06 3.784080e+04 2.965254e+05 6.500000e+03 2.357709e+06 1.618973e+04 7.749743e+04
[870] 9.997194e+04 2.527423e+05 3.445572e+06 3.974058e+05 5.881860e+05 3.075315e+05 7.709066e+05 1.783857e+03 7.747286e+04 1.226100e+05 2.089443e+04
[881] 3.387257e+05 1.560272e+06 6.439684e+03 2.570000e+04 3.606750e+05 2.203167e+03 7.068560e+05 1.220746e+06 6.975000e+04 3.779488e+04 7.646000e+03
[892] 7.541167e+03 5.125000e+03 2.262719e+06 1.938892e+04 2.692040e+05 6.140060e+04 1.792837e+05 4.402190e+05 1.575040e+04 7.090840e+04 3.337250e+05
[903] 5.731001e+06 2.104515e+04 1.044764e+05 8.069371e+04 7.022533e+04 5.147775e+04 2.671048e+07 3.445871e+04 3.526312e+05 1.590991e+04 1.633174e+04
[914] 4.837500e+04 1.883433e+06 3.735045e+04 7.220286e+03 8.345474e+03 2.875100e+04 8.380687e+05 1.577174e+06 4.048120e+04 3.919902e+05 7.893612e+05
[925] 4.792659e+05 3.217702e+05 1.060600e+04 2.281777e+05 1.546333e+03 4.411375e+04 2.270700e+04 7.259900e+04 1.666667e+03 5.351866e+05 2.960200e+04
[936] 1.171645e+04 2.070700e+04 1.247008e+06 1.933758e+05 1.309032e+06 3.021159e+05 2.768850e+04 2.633555e+04 1.809172e+04 2.127750e+05 4.110809e+05
[947] 2.555815e+06 7.225000e+04 2.551955e+05 2.549870e+04 3.220990e+05 1.981250e+03 3.641286e+03 4.524049e+05 2.416150e+05 3.732190e+05 3.286906e+05
[958] 3.963517e+05 2.049280e+04 2.534387e+04 9.779467e+04 6.861800e+04 2.593780e+04 2.579882e+05 9.113667e+03 1.196111e+06 4.248750e+05 5.599746e+02
[969] 5.257556e+04 4.220304e+06 9.649170e+04 1.524894e+06 2.881370e+05 7.024000e+03 1.932345e+05 2.302996e+05 3.022333e+03 1.344000e+03 2.038725e+04
[980] 2.909133e+04 8.755333e+03 7.850333e+03 4.967767e+04 2.689325e+03 1.453340e+04 3.059725e+04 1.103466e+05 5.650000e+04 8.000000e+03 4.986553e+04
[991] 2.457591e+03 1.632398e+05 7.022533e+04 2.229369e+04 4.315057e+03 2.437333e+03 1.232106e+06 4.125917e+05 3.833333e+03 2.692040e+05
[ reached getOption("max.print") -- omitted 536 entries ]
$X.test
Year NameAlternative Fuels and Electric Vehicle Recharging Property Credit NameAlternative Minimum Tax Credit NameBeer Production Credit
2 2019 0 0 0
6 2019 0 0 0
10 2019 1 0 0
12 2019 0 0 0
25 2019 0 0 0
26 2019 0 0 0
28 2019 0 0 0
30 2019 0 0 0
32 2019 0 0 0
35 2019 0 0 0
38 2019 0 0 0
50 2019 0 0 0
53 2019 0 0 0
54 2019 0 0 0
63 2019 0 0 0
70 2019 0 0 0
76 2019 0 0 0
85 2019 0 0 0
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15
2 0
6 0
10 0
12 1
25 0
26 0
28 0
30 0
32 0
35 0
38 0
50 0
53 0
54 0
63 0
70 0
76 0
85 0
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08
2 0 0
6 0 0
10 0 0
12 0 0
25 0 0
26 0 0
28 0 0
30 0 0
32 0 0
35 0 0
38 0 0
50 0 0
53 0 0
54 0 0
63 0 0
70 0 0
76 0 0
85 0 0
NameBrownfield Tax Credits - Remediation Real Property Tax Credit NameClean Heating Fuel Credit NameConservation Easement Tax Credit
2 0 0 0
6 0 0 0
10 0 0 0
12 0 0 0
25 1 0 0
26 1 0 0
28 1 0 0
30 0 1 0
32 0 1 0
35 0 0 1
38 0 0 1
50 0 0 0
53 0 0 0
54 0 0 0
63 0 0 0
70 0 0 0
76 0 0 0
85 0 0 0
NameCredit for Employment of Persons with Disabilities NameCredit for Purchase of an Automated External Defibrillator
2 0 0
6 0 0
10 0 0
12 0 0
25 0 0
26 0 0
28 0 0
30 0 0
32 0 0
35 0 0
38 0 0
50 0 0
53 0 0
54 0 0
63 0 0
70 0 0
76 0 0
85 0 0
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities NameEmpire State Apprentice Tax Credit
2 0 0
6 0 0
10 0 0
12 0 0
25 0 0
26 0 0
28 0 0
30 0 0
32 0 0
35 0 0
38 0 0
50 0 1
53 0 0
54 0 0
63 0 0
70 0 0
76 0 0
85 0 0
NameEmpire State Commercial Production Credit NameEmpire State Film Post Production Credit NameEmpire State Film Production Credit
2 0 0 0
6 0 0 0
10 0 0 0
12 0 0 0
25 0 0 0
26 0 0 0
28 0 0 0
30 0 0 0
32 0 0 0
35 0 0 0
38 0 0 0
50 0 0 0
53 0 1 0
54 0 1 0
63 0 0 0
70 0 0 0
76 0 0 0
85 0 0 0
NameEmpire State Musical and Theatrical Production Credit NameExcelsior Jobs Program Credit NameEZ/QEZE Tax Credits - EZ Investment Tax Credit
2 0 0 0
6 0 0 0
10 0 0 0
12 0 0 0
25 0 0 0
26 0 0 0
28 0 0 0
30 0 0 0
32 0 0 0
35 0 0 0
38 0 0 0
50 0 0 0
53 0 0 0
54 0 0 0
63 0 1 0
70 0 0 1
76 0 0 0
85 0 0 0
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes
2 0 0
6 0 0
10 0 0
12 0 0
25 0 0
26 0 0
28 0 0
30 0 0
32 0 0
35 0 0
38 0 0
50 0 0
53 0 0
54 0 0
63 0 0
70 0 0
76 0 1
85 0 0
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit
2 0 0
6 0 0
10 0 0
12 0 0
25 0 0
26 0 0
28 0 0
30 0 0
32 0 0
35 0 0
38 0 0
50 0 0
53 0 0
54 0 0
63 0 0
70 0 0
76 0 0
85 1 0
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners NameFarm Workforce Retention Credit NameFarmers' School Tax Credit
2 0 0 0
6 0 0 0
10 0 0 0
12 0 0 0
25 0 0 0
26 0 0 0
28 0 0 0
30 0 0 0
32 0 0 0
35 0 0 0
38 0 0 0
50 0 0 0
53 0 0 0
54 0 0 0
63 0 0 0
70 0 0 0
76 0 0 0
85 0 0 0
NameHire a Veteran Credit NameHistoric Properties Rehabilitation Credit NameIndustrial or Manufacturing Business Tax Credit
2 0 0 0
6 0 0 0
10 0 0 0
12 0 0 0
25 0 0 0
26 0 0 0
28 0 0 0
30 0 0 0
32 0 0 0
35 0 0 0
38 0 0 0
50 0 0 0
53 0 0 0
54 0 0 0
63 0 0 0
70 0 0 0
76 0 0 0
85 0 0 0
NameInvestment Tax Credit NameInvestment Tax Credit for the Financial Services Industry NameLife Sciences Research & Development Tax Credit
2 0 0 0
6 0 0 0
10 0 0 0
12 0 0 0
25 0 0 0
26 0 0 0
28 0 0 0
30 0 0 0
32 0 0 0
35 0 0 0
38 0 0 0
50 0 0 0
53 0 0 0
54 0 0 0
63 0 0 0
70 0 0 0
76 0 0 0
85 0 0 0
NameLong-Term Care Insurance Credit NameLow-Income Housing Credit NameMinimum Wage Reimbursement Credit NameMortgage Servicing Tax Credit
2 0 0 0 0
6 0 0 0 0
10 0 0 0 0
12 0 0 0 0
25 0 0 0 0
26 0 0 0 0
28 0 0 0 0
30 0 0 0 0
32 0 0 0 0
35 0 0 0 0
38 0 0 0 0
50 0 0 0 0
53 0 0 0 0
54 0 0 0 0
63 0 0 0 0
70 0 0 0 0
76 0 0 0 0
85 0 0 0 0
NameNew York Youth Jobs Program Tax Credit NameQETC Capital Tax Credit NameQETC Employment Credit
2 0 0 0
6 0 0 0
10 0 0 0
12 0 0 0
25 0 0 0
26 0 0 0
28 0 0 0
30 0 0 0
32 0 0 0
35 0 0 0
38 0 0 0
50 0 0 0
53 0 0 0
54 0 0 0
63 0 0 0
70 0 0 0
76 0 0 0
85 0 0 0
NameQETC Facilities, Operations, and Training Credit NameReal Property Tax Relief Credit for Manufacturing
2 0 0
6 0 0
10 0 0
12 0 0
25 0 0
26 0 0
28 0 0
30 0 0
32 0 0
35 0 0
38 0 0
50 0 0
53 0 0
54 0 0
63 0 0
70 0 0
76 0 0
85 0 0
NameSpecial Additional Mortgage Recording Tax Credit NameSTART-UP NY Tax Elimination Credit Group1,000,000 - 24,999,999 Group100,000 - 499,999
2 0 0 0 0
6 0 0 0 0
10 0 0 0 0
12 0 0 0 0
25 0 0 0 1
26 0 0 1 0
28 0 0 0 0
30 0 0 0 0
32 0 0 0 1
35 0 0 0 0
38 0 0 1 0
50 0 0 0 0
53 0 0 0 0
54 0 0 0 0
63 0 0 0 0
70 0 0 0 1
76 0 0 0 0
85 0 0 0 0
Group100,000,000 - 499,999,999 Group25,000,000 - 49,999,999 Group50,000,000 - 99,999,999 Group500,000 - 999,999 Group500,000,000 - and over
2 0 0 0 0 0
6 0 0 1 0 0
10 1 0 0 0 0
12 0 0 0 1 0
25 0 0 0 0 0
26 0 0 0 0 0
28 1 0 0 0 0
30 0 0 0 0 0
32 0 0 0 0 0
35 0 0 0 0 0
38 0 0 0 0 0
50 1 0 0 0 0
53 0 0 0 0 0
54 0 0 0 0 1
63 0 0 0 1 0
70 0 0 0 0 0
76 0 0 0 0 0
85 0 0 0 0 1
GroupZero or Net Loss Num
2 0 3.000000
6 0 1.000000
10 0 1.666667
12 0 2.000000
25 0 1.166667
26 0 1.166667
28 0 1.166667
30 1 266.000000
32 0 17.000000
35 1 1.000000
38 0 1.000000
50 0 1.333333
53 0 4.000000
54 0 4.000000
63 0 4.000000
70 0 1.428571
76 1 9.000000
85 0 3.000000
[ reached getOption("max.print") -- omitted 367 rows ]
$y.test
[1] 6.270533e+04 4.094677e+05 2.200770e+05 1.642082e+06 2.718876e+05 2.718876e+05 2.718876e+05 1.851951e+03 1.965588e+03 1.081733e+04 1.081733e+04
[12] 9.350000e+04 9.852682e+05 9.852682e+05 2.479582e+05 9.275383e+06 1.382386e+07 8.166333e+04 7.521833e+03 2.025000e+04 1.007273e+05 4.288667e+03
[23] 1.441000e+04 8.690525e+04 3.529140e+04 2.250933e+04 4.368017e+04 4.166496e+05 1.673048e+03 1.224760e+04 4.033988e+05 3.009937e+06 4.557862e+06
[34] 4.436802e+06 4.436802e+06 1.951052e+05 1.951052e+05 1.951052e+05 1.787933e+03 8.000000e+03 8.000000e+03 1.666667e+04 4.000000e+03 6.979000e+04
[45] 4.864249e+05 9.061236e+07 6.567026e+05 1.759552e+06 3.175900e+04 4.133590e+05 2.931458e+05 1.220746e+06 6.656878e+03 1.369802e+05 2.073250e+03
[56] 3.282500e+03 3.723650e+04 4.621417e+04 5.147775e+04 5.147775e+04 1.855775e+04 1.806176e+05 4.334150e+04 1.883396e+05 2.134563e+06 9.031245e+05
[67] 4.711452e+06 4.711452e+06 3.081000e+03 6.779667e+03 6.500000e+03 1.922176e+06 3.533451e+06 1.017148e+05 4.867948e+05 9.827525e+04 8.861392e+05
[78] 2.233330e+06 1.980965e+05 7.893612e+05 7.384467e+04 2.122267e+04 4.305500e+04 1.724660e+05 8.312500e+03 1.960654e+05 1.895372e+05 1.860170e+05
[89] 3.550520e+04 1.305918e+05 6.510700e+04 8.475000e+04 1.031893e+05 5.188400e+04 3.699730e+05 2.996425e+04 2.499283e+05 2.499283e+05 1.291816e+06
[100] 8.127092e+05 1.951614e+06 8.069371e+04 1.280000e+04 3.500000e+03 4.811593e+07 4.589250e+05 1.376282e+05 1.182098e+05 2.509321e+07 5.141760e+04
[111] 5.141760e+04 3.721344e+06 1.470486e+04 1.983803e+05 1.430607e+04 1.763827e+04 4.517570e+06 4.089729e+04 9.892656e+04 3.295417e+05 2.128814e+03
[122] 4.961167e+03 2.698137e+06 2.855810e+03 6.957592e+05 2.126993e+04 4.464000e+04 4.464000e+04 2.070700e+04 6.312500e+04 7.685714e+04 7.275000e+04
[133] 1.282233e+04 7.973200e+03 2.796945e+06 2.796945e+06 6.827385e+05 2.551955e+05 4.540254e+05 9.525000e+03 7.385556e+02 1.358533e+04 1.500000e+03
[144] 3.333333e+04 3.526312e+05 1.474218e+06 9.883970e+04 2.386651e+05 2.229698e+06 2.980568e+04 2.288953e+06 1.600152e+06 6.753602e+05 5.273845e+04
[155] 7.810820e+04 6.680000e+02 2.724733e+04 3.663453e+04 1.151319e+04 2.249117e+04 8.826000e+03 2.317820e+06 7.643536e+04 9.920675e+04 3.795877e+05
[166] 1.977606e+04 2.685822e+03 5.883905e+03 4.441692e+03 6.359594e+04 5.514430e+05 4.741105e+04 7.193025e+04 1.508482e+06 1.949050e+05 1.256800e+04
[177] 9.014667e+03 1.652667e+03 3.240708e+05 1.636045e+05 2.471333e+04 6.068333e+03 6.068333e+03 9.000000e+02 5.000000e+03 3.393300e+04 3.027754e+07
[188] 7.564780e+04 4.128836e+04 5.670456e+04 7.903272e+05 1.647877e+06 3.055000e+04 8.423750e+05 1.090874e+05 5.378571e+02 5.922277e+05 1.030300e+04
[199] 1.030300e+04 7.008638e+04 8.010614e+06 5.874554e+04 4.891083e+04 2.551117e+04 4.063938e+03 6.703886e+04 7.177040e+04 3.691667e+04 1.589467e+04
[210] 5.795531e+05 1.384138e+04 2.983178e+05 2.181643e+05 2.181643e+05 8.051457e+05 2.916333e+03 2.916333e+03 8.333333e+02 8.050171e+04 2.341759e+06
[221] 1.389300e+04 1.713608e+05 4.281964e+05 4.281964e+05 4.230000e+05 9.592945e+04 3.683733e+05 5.060070e+04 1.954157e+05 5.990749e+04 5.321884e+05
[232] 9.649170e+04 1.147545e+04 1.756816e+04 2.333804e+05 3.212752e+06 1.815720e+04 1.574442e+03 1.585990e+05 1.585990e+05 7.039875e+03 1.817167e+03
[243] 1.817167e+03 2.631080e+05 1.289623e+04 6.081111e+03 3.878656e+05 2.794458e+05 4.938133e+04 3.230000e+02 5.501250e+03 1.000000e+03 2.150442e+06
[254] 8.666300e+04 2.096018e+06 1.358255e+05 3.224167e+05 3.778690e+04 5.412605e+05 5.962975e+04 1.917436e+04 4.290383e+04 4.290383e+04 1.040245e+04
[265] 1.753420e+06 1.498433e+05 7.791730e+04 1.792837e+05 1.611846e+03 2.671333e+03 3.334000e+03 6.916667e+03 1.546916e+05 2.985750e+04 2.985750e+04
[276] 3.387741e+05 2.966550e+05 1.349429e+03 7.541167e+03 8.333333e+02 1.010000e+04 1.010000e+04 3.784080e+04 5.916069e+05 2.844688e+05 3.728435e+05
[287] 4.763364e+05 7.853154e+05 4.732412e+05 8.977273e+02 1.459739e+05 3.917140e+05 1.365532e+06 2.915842e+04 2.545867e+03 6.675000e+03 1.722175e+04
[298] 1.943908e+05 1.280000e+04 3.376638e+04 1.113726e+04 2.633555e+04 2.633555e+04 1.021064e+05 3.337250e+05 1.081118e+05 2.625417e+05 1.027671e+05
[309] 7.064006e+04 5.689091e+05 8.933342e+04 2.871402e+02 2.261608e+06 1.386061e+05 1.708555e+04 7.812500e+04 7.760417e+04 1.885399e+04 4.555419e+05
[320] 1.936990e+05 8.720815e+04 4.926557e+05 1.508330e+05 3.732776e+05 1.915909e+05 2.266036e+04 6.687111e+04 3.218191e+05 1.345040e+06 7.565226e+02
[331] 8.422389e+05 1.752836e+04 2.224450e+05 1.359291e+05 2.777950e+04 1.545613e+05 3.869246e+05 3.329819e+04 2.464650e+05 5.268800e+04 1.926750e+04
[342] 5.268800e+04 1.532851e+04 1.761740e+05 2.385813e+05 2.343167e+03 8.239147e+04 1.094581e+05 1.978800e+05 2.772085e+03 1.410062e+04 1.104294e+04
[353] 1.374426e+04 2.240877e+05 2.792584e+03 4.481963e+04 5.703256e+05 1.823070e+05 3.808170e+05 2.901302e+07 6.624115e+04 3.456612e+04 6.230926e+02
[364] 2.262116e+05 1.965676e+05 2.200508e+05 1.833129e+06 2.837499e+03 5.828539e+02 4.791057e+07 3.064511e+04 2.634839e+05 1.531092e+04 4.959974e+03
[375] 5.110326e+05 3.635190e+03 5.675850e+04 4.368009e+05 1.648528e+07 4.285965e+04 1.161933e+04 3.644922e+05 9.586857e+03 5.011701e+05 2.681130e+06
$data.train
$data.test
NA
Lasso regression for comparison to Forward Stepwise
# calc_MSE <- function (model, x.test, y.test){
# y.predict <- predict(model, newdata = x.test)
# return(mean((y.predict - y.test)^2))
# }
# xy.splits <- list('income' = income.data.split,
# 'industry' = industry.data.split,
# 'income.sat' = income.data.split.sat,
# 'industry.sat' = industry.data.split.sat)
#
# lasso.list <- c('income', 'industry', 'income.sat', 'industry.sat')
# baseline.list <- c('income.sat', 'industry.sat', 'income.sat', 'industry.sat')
# ridge.alpha <- 0
#
# for (a in 1:4){
# i = lasso.list[a]
# b = baseline.list[a]
# X.train <- xy.splits[[i]][['X.train']]
# y.train <- xy.splits[[i]][['y.train']]
# X.test <- xy.splits[[i]][['X.test']]
# y.test <- xy.splits[[i]][['y.test']]
# X.test.sat <- xy.splits[[b]][['X.test']]
# y.test.sat <- xy.splits[[b]][['y.test']]
# data.train <- xy.splits[[i]][['data.train']]
# data.test <- xy.splits[[i]][['data.test']]
#
# #create lambda grid
# lambda.grid = 10^seq(2, -5, length = 100)
#
# #create lasso models with lambda.grid
# lasso.models = glmnet(X.train, y.train, alpha = ridge.alpha, lambda = lambda.grid)
#
# #visualize coefficient shrinkage
# plot(lasso.models, xvar = "lambda", label = TRUE, main = paste("Lasso Regression:", i))
#
# #Cross Validation to find best lambda
# set.seed(0)
# cv.lasso.models <- cv.glmnet(X.train, y.train, alpha = ridge.alpha, lambda = lambda.grid, nfolds = 10)
#
# #visualize cross validation for lambda that minimizes the mean squared error.
# plot(cv.lasso.models, main = paste("Lasso Regression:", i))
#
# #Checking the best lambda
# log(cv.lasso.models$lambda.min)
# best.lambda <- cv.lasso.models$lambda.min
# print(paste(i, ' best.lambda:', best.lambda))
# # best lambda with all the variables was found to be 0.0006892612
# # best lambda with only the bwdBIC coefficients included was found to be 0.0003053856
#
# #looking at the lasso coefficients for the best.lambda
# best.lambda.coeff <- predict(lasso.models, s = best.lambda, type = "coefficients")
# print('Number of Coefficients:')
# print(dim(best.lambda.coeff)[1])
#
# #fitting a model with the best lambda found to be 0.000689 and using it to make predictions for the testing data.
# lasso.best.lambda.train.pred <- predict(lasso.models, s = best.lambda, newx = X.test)
# lasso.best.lambda.train.pred
#
# #checking MSE
# MSE.lasso <- mean((lasso.best.lambda.train.pred - y.test)^2)
# sat.model.bc <- lm(Avg.bc ~., data = data.train)
#
#
# temp.df <- as.data.frame(X.test.sat) #temp fix
# colnames(temp.df) <- str_replace_all(colnames(temp.df), "[`]", '')
#
# y.predict <- predict(sat.model.bc, newdata = temp.df)
# MSE.sat <- mean((y.predict - y.test.sat)^2)
#
# print(paste(i, ' Lasso MSE: ', MSE.lasso, ' ', b, ' Saturated MSE: ', MSE.sat))
#
# metrics <- eval_results(y.test, lasso.best.lambda.train.pred, data.test)
# print(metrics)
# print('********************************')
# }
Function to show metrics (R^2 and MSE) for Regularization (Ridge/Lasso)
#all.dfs <- list(income_cleaned, income_cleaned_bc, income.dummy.bc)
formulas <- list(income_cleaned = sat.formula,
income_cleaned_bc = sat.formula.bc,
income.data.split.sat = sat.formula.bc,
income.data.split.best = income.best.formula,
industry_cleaned = sat.formula,
industry_cleaned_bc = sat.formula.bc,
industry.data.split.sat = sat.formula.bc,
industry.data.split.best = industry.best.formula)
all.splits <- list(
'income_cleaned' = test_train_split(income_cleaned, formulas[['income_cleaned']]),
'income_cleaned_bc' = test_train_split(income_cleaned_bc, formulas[['income_cleaned_bc']]),
'income.data.split.sat' = test_train_split(income.dummy.bc, formulas[['income.data.split.sat']]),
'income.data.split.best' = test_train_split(income.dummy.bc, formulas[['income.data.split.best']]),
'industry_cleaned' = test_train_split(industry_cleaned, formulas[['industry_cleaned']]),
'industry_cleaned_bc' = test_train_split(industry_cleaned_bc, formulas[['industry_cleaned_bc']]),
'industry.data.split.sat' = test_train_split(industry.dummy.bc, formulas[['industry.data.split.sat']]),
'industry.data.split.best' = test_train_split(industry.dummy.bc, formulas[['industry.data.split.best']])
)
#For loops initialization
no.reg.r2 <- c()
lasso.reg.r2 <- c()
ridge.reg.r2 <- c()
for (i in names(all.splits)) {
#no regularization
m = lm(formulas[[i]], all.splits[[i]][['data.train']])
#no.reg.r2 <- c(no.reg.r2, summary(m)$adj.r.squared)
y.predict = predict(m, newdata = as.data.frame(all.splits[[i]][['X.test']]))
adj.R2 <- eval_results(all.splits[[i]][['y.test']], y.predict, all.splits[[i]][['data.test']], length(coef(m)))['Rsquare']
no.reg.r2 <- c(no.reg.r2, adj.R2)
#lasso regularization
lasso.reg.r2 <- c(lasso.reg.r2, regularization_func(all.splits[[i]], 1, i))
#ridge regularization
ridge.reg.r2 <- c(ridge.reg.r2, regularization_func(all.splits[[i]], 0, i))
}
Warning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' values
[1] "Dimensions of the Alpha: 1 Regression Coefficients for: income_cleaned"
[1] 54
Warning: collapsing to unique 'x' values
[1] "Dimensions of the Alpha: 0 Regression Coefficients for: income_cleaned"
[1] 54
Warning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' values
Warning: collapsing to unique 'x' values
[1] "Dimensions of the Alpha: 1 Regression Coefficients for: income_cleaned_bc"
[1] 54
Warning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' values
Warning: collapsing to unique 'x' values
[1] "Dimensions of the Alpha: 0 Regression Coefficients for: income_cleaned_bc"
[1] 54
Warning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' values
Warning: collapsing to unique 'x' values
[1] "Dimensions of the Alpha: 1 Regression Coefficients for: income.data.split.sat"
[1] 54
Warning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' values
Warning: collapsing to unique 'x' values
[1] "Dimensions of the Alpha: 0 Regression Coefficients for: income.data.split.sat"
[1] 54
Warning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' values
Warning: collapsing to unique 'x' values
[1] "Dimensions of the Alpha: 1 Regression Coefficients for: income.data.split.best"
[1] 41
Warning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' values
Warning: collapsing to unique 'x' values
[1] "Dimensions of the Alpha: 0 Regression Coefficients for: income.data.split.best"
[1] 41
Warning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' values
Warning: collapsing to unique 'x' values
[1] "Dimensions of the Alpha: 1 Regression Coefficients for: industry_cleaned"
[1] 65
Warning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' values
Warning: collapsing to unique 'x' values
[1] "Dimensions of the Alpha: 0 Regression Coefficients for: industry_cleaned"
[1] 65
Warning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' values
Warning: collapsing to unique 'x' values
[1] "Dimensions of the Alpha: 1 Regression Coefficients for: industry_cleaned_bc"
[1] 65
Warning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' values
Warning: collapsing to unique 'x' values
[1] "Dimensions of the Alpha: 0 Regression Coefficients for: industry_cleaned_bc"
[1] 65
Warning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' values
Warning: collapsing to unique 'x' values
[1] "Dimensions of the Alpha: 1 Regression Coefficients for: industry.data.split.sat"
[1] 65
Warning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' values
Warning: collapsing to unique 'x' values
[1] "Dimensions of the Alpha: 0 Regression Coefficients for: industry.data.split.sat"
[1] 65
Warning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' values
Warning: collapsing to unique 'x' values
[1] "Dimensions of the Alpha: 1 Regression Coefficients for: industry.data.split.best"
[1] 42
Warning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' valuesWarning: collapsing to unique 'x' values
Warning: collapsing to unique 'x' values
[1] "Dimensions of the Alpha: 0 Regression Coefficients for: industry.data.split.best"
[1] 42
df.rsquare <- as.data.frame(cbind('noReg' = no.reg.r2, 'lassoReg' = lasso.reg.r2, 'ridgeReg' = ridge.reg.r2))
rownames(df.rsquare) = names(all.splits)
df.rsquare
# sat.model.bc <- lm(Avg.bc ~., data = xy.splits.sat[['industry']][['data.train']])
#
# temp.df <- as.data.frame(xy.splits.sat[['industry']][['X.test']])
#
# colnames(temp.df)[57]
#
# colnames(temp.df) <- str_replace_all(colnames(temp.df), "[`]", '')
# colnames(temp.df)
#
# y.predict <- predict(sat.model.bc, newdata = temp.df)
# MSE.sat <- mean((y.predict - xy.splits.sat[['industry']][['y.test']])^2)
#
# print(paste(i, ' Lasso MSE: ', MSE.lasso, ' Saturated MSE: ', MSE.sat))
#
# summary(sat.model.bc)$coefficients
# as.data.frame(xy.splits.sat[['industry']][['X.test']]) %>% select(starts_with('`GroupOth'))
# Compute R^2 from true and predicted values
eval_results <- function(true, predicted, df, p) {
n = nrow(df)
adj.RSS <- sum((predicted - true)^2)/(n-p-1)
adj.TSS <- sum((true - mean(true))^2)/(n-1)
adj.R_square <- 1 - adj.RSS / adj.TSS
#RMSE = sqrt(RSS/n)
return(c(Rsquare = R_square))
}
#formula for adjusted R^2 found in following link.
#https://www.graphpad.com/guides/prism/latest/curve-fitting/reg_adjusted-r-squared.htm
as.data.frame(lasso.best.lambda.train.pred) %>% mutate(Avg_in_dollars = (lasso.best.lambda.train.pred*lambda.bc+1)^(1/lambda.bc))
calc_MSE(sat.model.bc, as.data.frame(income.sat.data.split[['X.test']]), income.sat.data.split[['y.test']])
[1] 1.473686